import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
data=pd.read_csv('C:/Users/Rakesh/Datasets/flipkart_reviews.csv')
data.head()
| Product_name | Review | Rating | |
|---|---|---|---|
| 0 | Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600... | Best under 60k Great performanceI got it for a... | 5 |
| 1 | Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600... | Good perfomence... | 5 |
| 2 | Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600... | Great performance but usually it has also that... | 5 |
| 3 | DELL Inspiron Athlon Dual Core 3050U - (4 GB/2... | My wife is so happy and best product 👌🏻😘 | 5 |
| 4 | DELL Inspiron Athlon Dual Core 3050U - (4 GB/2... | Light weight laptop with new amazing features,... | 5 |
data.isnull().sum()
Product_name 0 Review 0 Rating 0 dtype: int64
import nltk
import re
nltk.download('stopwords')
stemmer=nltk.SnowballStemmer('english')
from nltk.corpus import stopwords
import string
stopword=set(stopwords.words('english'))
[nltk_data] Downloading package stopwords to [nltk_data] C:\Users\Rakesh\AppData\Roaming\nltk_data... [nltk_data] Package stopwords is already up-to-date!
def clean(text):
text = str(text).lower()
text = re.sub('\[.*?\]', '', text)
text = re.sub('https?://\S+|www\.\S+', '', text)
text = re.sub('<.*?>+', '', text)
text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
text = re.sub('\n', '', text)
text = re.sub('\w*\d\w*', '', text)
text = [word for word in text.split(' ') if word not in stopword]
text=" ".join(text)
text = [stemmer.stem(word) for word in text.split(' ')]
text=" ".join(text)
return text
data['Review']=data['Review'].apply(clean)
ratings=data['Rating'].value_counts()
numbers= ratings.index
quantity=ratings.values
import plotly.express as px
figure=px.pie(data,values=quantity,names=numbers,hole=0.5)
figure.show()
text=" ".join(i for i in data.Review)
stopwords=set(STOPWORDS)
wordcloud=WordCloud(stopwords=stopwords,background_color='white').generate(text)
plt.figure(figsize=(15,10))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
nltk.download('vader_lexicon')
sentiments=SentimentIntensityAnalyzer()
data['Positive']=[sentiments.polarity_scores(i)['pos'] for i in data['Review']]
data['Negative']=[sentiments.polarity_scores(i)['neg'] for i in data['Review']]
data['Neutral']=[sentiments.polarity_scores(i)['neu'] for i in data['Review']]
data=data[["Review","Positive","Negative","Neutral"]]
print(data.head())
[nltk_data] Downloading package vader_lexicon to [nltk_data] C:\Users\Rakesh\AppData\Roaming\nltk_data... [nltk_data] Package vader_lexicon is already up-to-date!
Review Positive Negative \ 0 best great performancei got around backup bi... 0.395 0.101 1 good perfom 0.744 0.000 2 great perform usual also game laptop issu batt... 0.277 0.000 3 wife happi best product 👌🏻😘 0.512 0.000 4 light weight laptop new amaz featur batteri li... 0.000 0.000 Neutral 0 0.504 1 0.256 2 0.723 3 0.488 4 1.000
#Now lets see how most of the reviewers think about the product and services of flipkart
x=sum(data['Positive'])
y=sum(data['Negative'])
z=sum(data['Neutral'])
def sentiment_score(a,b,c):
if (a>b) and (a>c):
print('Positive😊')
if (b>a) and (b>c):
print('Negative😠')
else:
print('Neutral🙂')
sentiment_score(x,y,z)
Neutral🙂
print("Positive: ", x)
print("Negative: ", y)
print("Neutral: ", z)
Positive: 923.5529999999985 Negative: 96.77500000000013 Neutral: 1283.6880000000006